In this section, we only focus on analysis at either pick up or drop off tract level (not zone to zone or OD pairs). The following figure show aggregate-level summary statistics at pick up tracts and drop off tracts.
pickups <- data %>%
group_by(GEOID = pickup_census_tract) %>%
summarise("sum_trip" = n(), # total number of trips
"sum_trip_shared_ok" = sum(shared_authorized > 0), # number of trip authorized to be shared
"sum_trip_shared" = sum(pooled[shared_authorized > 0] > 0)) %>% # number of trips with successful shared match
mutate("ratio_shared_ok" = sum_trip_shared_ok / sum_trip,
"ratio_matched" = sum_trip_shared / sum_trip_shared_ok) %>%
mutate(GEOID = as.character(GEOID)) %>%
drop_na()
pickups %>%
select(ratio_shared_ok, ratio_matched, GEOID) %>%
pivot_longer(c("ratio_shared_ok", "ratio_matched")) %>%
ggplot()+
geom_violin(aes(x= name, y= value)) +
geom_boxplot(aes(x = name, y = value ), width = 0.02) +
theme_bw() + labs(title = "By Pick Up Tract", x = "", y = "Ratio") +
scale_x_discrete(labels=c("ratio_shared_ok" = "ratio of trips\nauthorized to\nbe shared",
"ratio_matched" = "ratio of shared\nauthorized trips\nsuccessfully matched"))
# Function to cut and create labels
source("~/Desktop/Chicago-TNC-analysis/quantiles_cut.R")
pickups <- quantiles_cut(input = pickups, variable = "sum_trip", n_cut = 10, n_round = 0)
## Warning: rename_() is deprecated.
## Please use rename() instead
##
## The 'programming' vignette or the tidyeval book can help you
## to program with rename() : https://tidyeval.tidyverse.org
## This warning is displayed once per session.
chicago %>% left_join(pickups, by = "GEOID") %>% filter(!is.na(sum_trip_cut)) %>%
ggplot(aes(fill = sum_trip_cut)) +
geom_sf(color = NA) +
theme_bw() +
labs(fill = "" ,
title = "Number of Total Trips",
subtitle = "By Pick Up Census Tract")+
scale_fill_viridis_d(direction = -1)
pickups <- quantiles_cut(input = pickups, variable = "ratio_shared_ok", n_cut = 10)
chicago %>% left_join(pickups, by = "GEOID") %>% filter(!is.na(ratio_shared_ok_cut)) %>%
ggplot(aes(fill = ratio_shared_ok_cut)) +
geom_sf(color = NA) +
theme_bw() +
labs(fill = "" ,
title = "Ratio of Trips Authorized for Shared Ride",
subtitle = "By Pick Up Census Tract")+
scale_fill_viridis_d(direction = 1)
dropoffs <- data %>%
group_by(GEOID = dropoff_census_tract) %>%
summarise("sum_trip" = n(), # total number of trips
"sum_trip_shared_ok" = sum(shared_authorized > 0), # number of trip authorized to be shared
"sum_trip_shared" = sum(pooled[shared_authorized > 0] > 0)) %>% # number of trips with successful shared match
mutate("ratio_shared_ok" = sum_trip_shared_ok / sum_trip,
"ratio_matched" = sum_trip_shared / sum_trip_shared_ok) %>%
mutate(GEOID = as.character(GEOID)) %>%
drop_na()
dropoffs %>%
select(ratio_shared_ok, ratio_matched, GEOID) %>%
pivot_longer(c("ratio_shared_ok", "ratio_matched")) %>%
ggplot()+
geom_violin(aes(x= name, y= value)) +
geom_boxplot(aes(x = name, y = value ), width = 0.02) +
theme_bw() + labs(title = "By Drop Off Tract", x = "", y = "Ratio") +
scale_x_discrete(labels=c("ratio_shared_ok" = "ratio of trips\nauthorized to\nbe shared",
"ratio_matched" = "ratio of shared\nauthorized trips\nsuccessfully matched"))
# Function to cut and create labels
source("~/Desktop/Chicago-TNC-analysis/quantiles_cut.R")
dropoffs <- quantiles_cut(input = dropoffs, variable = "sum_trip", n_cut = 10, n_round = 0)
chicago %>% left_join(dropoffs, by = "GEOID") %>% filter(!is.na(sum_trip_cut)) %>%
ggplot(aes(fill = sum_trip_cut)) +
geom_sf(color = NA) +
theme_bw() +
labs(fill = "" ,
title = "Number of Total Trips",
subtitle = "By Drop Off Census Tract")+
scale_fill_viridis_d(direction = -1)
dropoffs <- quantiles_cut(input = dropoffs, variable = "ratio_shared_ok", n_cut = 10)
chicago %>% left_join(dropoffs, by = "GEOID") %>% filter(!is.na(ratio_shared_ok_cut)) %>%
ggplot(aes(fill = ratio_shared_ok_cut)) +
geom_sf(color = NA) +
theme_bw() +
labs(fill = "" ,
title = "Ratio of Trips Authorized for Shared Ride",
subtitle = "By Drop Off Census Tract")+
scale_fill_viridis_d(direction = 1)
In this section, we focus on OD pair analysis. This means, we aggregate trips between each two census tract. We observe 136074 OD pairs. Following figures show descriptive stats of zone-to-zone levels (OD pairs). In other words, we group the data to pairs of origin-destination where the new dataframe has 136074 rows.
ODPAIRS <- data %>%
group_by(pickup_census_tract, dropoff_census_tract) %>%
summarise("sum_trip" = n(), # total number of trips
"sum_trip_shared_ok" = sum(shared_authorized > 0), # number of trip authorized to be shared
"sum_trip_shared" = sum(pooled[shared_authorized > 0] > 0)) %>% # number of trips with successful shared match
mutate("ratio_shared_ok" = sum_trip_shared_ok / sum_trip,
"ratio_matched" = sum_trip_shared / sum_trip_shared_ok) %>%
drop_na()
ODPAIRS %>%
select(ratio_shared_ok, ratio_matched, pickup_census_tract, dropoff_census_tract) %>%
drop_na() %>%
pivot_longer(c("ratio_shared_ok", "ratio_matched")) %>%
ggplot()+
geom_violin(aes(x= name, y= value)) +
geom_boxplot(aes(x = name, y = value ), width = 0.02) +
theme_bw() + labs(title = "OD Pairs", x = "", y = "Ratio") +
scale_x_discrete(labels=c("ratio_shared_ok" = "ratio of trips\nauthorized to\nbe shared",
"ratio_matched" = "ratio of shared\nauthorized trips\nsuccessfully matched"))
# summary statistics of number of trips between each two tracts
summary(ODPAIRS$sum_trip)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.0 3.0 11.0 129.9 44.0 64124.0
# summary statistics of ratio of trips between each two tracts authorized to be shared
summary(ODPAIRS$ratio_shared_ok)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.004464 0.142857 0.294118 0.393475 0.500000 1.000000
# summary statistics of ratio of MATCHED trips between each two tracts which were authorized to be shared (success rate)
summary(ODPAIRS$ratio_matched)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.5000 0.8000 0.6969 1.0000 1.0000
The following maps needs some explanation to be clear. In the previous section, after finding the OD pairs, I found the share of trips that were authorized to be shared (ratio of willingness to pool (WTP) between two tracts).
Here, I average ratio of WTP over the pick up tract. It means that I group OD pairs by the pick up tract and then average the ratio of WTP. The message that the figure carries is not substantially different from other charts: WTP is higher in south and west neighborhoods.
ODPAIRS_average_shared_ratio <- ODPAIRS %>%
group_by(pickup_census_tract) %>%
summarise("average_ratio_shared_ok_pickup" = mean(ratio_shared_ok)) %>%
mutate(pickup_census_tract = as.character(pickup_census_tract))
chicago %>% left_join(ODPAIRS_average_shared_ratio, by = c("GEOID" = "pickup_census_tract")) %>% filter(!is.na(average_ratio_shared_ok_pickup)) %>%
ggplot(aes(fill = average_ratio_shared_ok_pickup)) +
geom_sf(color = NA) +
theme_bw() +
labs(fill = "" ,
title = "Average Ratio of Shared Authorized Trip in OD Pairs",
subtitle = "(Averaged Over Pick Up Census Tract)")+
scale_fill_viridis(direction = -1, limits = c(0,1))
pickups <- data %>%
mutate("start_central" = if_else(start_neighborhood == "Central", true = 1, 0),
"start_no_central" = if_else(start_neighborhood != "Central", true = 1, 0)) %>%
group_by(start_date) %>%
summarise("central_trips" = sum(start_central, na.rm = T), # daily number of trips in central
"no_central_trips" = sum(start_no_central, na.rm = T), # daily number of trips NOT in central
"pct_sharedOK_central_trips" = (sum(start_central[shared_authorized > 0], na.rm = T) / sum(start_central, na.rm = T)),
# daily percent of total rides that are (authorized to be) shared in central
"pct_sharedOK_no_central_trips" = (sum(start_no_central[shared_authorized > 0], na.rm = T) / sum(start_no_central, na.rm = T))
# daily percent of total rides that are (authorized to be) shared OUT of central
) %>%
mutate(start_date = as.Date(start_date))
ggplot(pickups, aes(x = start_date))+
geom_line(aes(y = central_trips, color = "Number of Trips Started in Central")) +
geom_line(aes(y = no_central_trips, color = "Number of Trips Started OUT of Central")) +
scale_colour_manual(values = c("blue", "red")) +
scale_y_continuous(labels = comma)+ theme_bw()+
theme(legend.position="bottom",
legend.title = element_blank(), legend.text=element_text(size=8)) +
scale_x_date(breaks=date_breaks("week")) +
labs(title = "Daily Total Number of Trips",
subtitle = "started in central and out of central",
y = "", x = "")
ggplot(pickups, aes(x = start_date))+
geom_line(aes(y = 100 * pct_sharedOK_central_trips, color = "Percentage of Trips Started in Central\nWhich Authorized Shared Ride")) +
geom_line(aes(y = 100* pct_sharedOK_no_central_trips, color = "Percentage of Trips Started OUT of Central\nWhich Authorized Shared Ride")) +
scale_colour_manual(values = c("blue", "red")) +
scale_y_continuous(labels = comma)+ theme_bw()+
theme(legend.position="bottom",
legend.title = element_blank(), legend.text=element_text(size=8)) +
scale_x_date(breaks=date_breaks("week")) +
labs(title = "Daily Percentage of Trips Authorized Shared Ride",
subtitle = "started in central and out of central",
y = "%", x = "") + theme(axis.text.x = element_text(angle = 90, hjust = 1))
pickups1 <- data %>%
mutate("start_central_6to22" = if_else(start_neighborhood == "Central" &
between(start_hour, 6, 21), true = 1, 0),
) %>%
group_by(start_date) %>%
summarise("pct_surcharge_central_trips" = (sum(start_central_6to22[shared_authorized < 1], na.rm = T) / sum(start_central_6to22, na.rm = T))
# percentage of trips in central between 6 AM to 10 PM which did not authorize to be shared
) %>%
mutate(start_date = as.Date(start_date))
g.subtitle <- expression(paste("Which ", bold("Unauthorized")," Shared Ride"))
ggplot(pickups1, aes(x = start_date))+
geom_line(aes(y = 100 * pct_surcharge_central_trips)) + theme_bw()+
theme(legend.position="bottom",
legend.title = element_blank(), legend.text=element_text(size=8)) +
scale_x_date(breaks=date_breaks("week")) +
labs(title = "Daily Percentage of Trips in Central Started Between 6 AM & 10",
subtitle = g.subtitle, y = "%", x = "",
caption = "This trips will be assessed a $3 tax under new policy")+
theme(axis.text.x = element_text(angle = 90, hjust = 1))
pickups <- data %>%
mutate("start_central" = if_else(start_neighborhood == "Central", true = 1, 0),
"start_no_central" = if_else(start_neighborhood != "Central", true = 1, 0)) %>%
group_by(hour=floor_date(start, "hour")) %>%
summarise("central_trips" = sum(start_central, na.rm = T), # daily number of trips in central
"no_central_trips" = sum(start_no_central, na.rm = T), # daily number of trips NOT in central
"pct_sharedOK_central_trips" = (sum(start_central[shared_authorized > 0], na.rm = T) / sum(start_central, na.rm = T)),
# daily percent of total rides that are (authorized to be) shared in central
"pct_sharedOK_no_central_trips" = (sum(start_no_central[shared_authorized > 0], na.rm = T) / sum(start_no_central, na.rm = T))
# daily percent of total rides that are (authorized to be) shared OUT of central
)
ggplot(pickups, aes(x = hour))+
geom_line(aes(y = central_trips, color = "Number of Trips Started in Central")) +
geom_line(aes(y = no_central_trips, color = "Number of Trips Started OUT of Central")) +
scale_colour_manual(values = c("blue", "red")) +
scale_y_continuous(labels = comma)+ theme_bw()+
theme(legend.position="bottom",
legend.title = element_blank(), legend.text=element_text(size=10)) +
scale_x_datetime(breaks=date_breaks("week"), date_labels = "%m/%d") +
labs(title = "Hourly Total Number of Trips",
subtitle = "started in central and out of central",
y = "", x = "") + theme(axis.text.x = element_text(angle = 90, hjust = 1))
ggplot(pickups, aes(x = hour))+
geom_line(aes(y = 100 * pct_sharedOK_central_trips, color = "Percentage of Trips Started in Central\nWhich Authorized Shared Ride")) +
geom_line(aes(y = 100* pct_sharedOK_no_central_trips, color = "Percentage of Trips Started OUT of Central\nWhich Authorized Shared Ride")) +
scale_colour_manual(values = c("blue", "red")) +
scale_y_continuous(labels = comma)+ theme_bw()+
theme(legend.position="bottom",
legend.title = element_blank(), legend.text=element_text(size=10)) +
scale_x_datetime(breaks=date_breaks("week"), date_labels = "%m/%d") +
labs(title = "Hourly Percentage of Trips Authorized Shared Ride",
subtitle = "started in central and out of central",
y = "%", x = "") + theme(axis.text.x = element_text(angle = 90, hjust = 1))